Wstęp
¶Eksploracja danych
¶Korzystamy ze zbioru danych z serwisu Kaggle: https://www.kaggle.com/competitions/gan-getting-started.
Zbiór zawiera:
Na potrzeby projektu korzystamy wyłącznie z katalogu monet_jpg
Do wykonania projektu wykorzystaliśmy:
¶Dane - wczytanie i konwersja obrazów do wektorów
¶class Dataset(Dataset):
def __init__(self, img_dir):
path_list = os.listdir(img_dir) #nazwy obrazow
abspath = os.path.abspath(img_dir) #znormalizowane sciezki obrazow
#pelna sciezka
self.img_list = [os.path.join(abspath, path) for path in path_list]
#zbior transformacji
self.transform = transforms.Compose([
transforms.Resize(64),
transforms.CenterCrop(64),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
])
def __len__(self):
return len(self.img_list)
def __getitem__(self, index):
path = self.img_list[index]
img = Image.open(path).convert('RGB') #rozmiar obrazu (256, 256)
return self.transform(img)
#return img
db = Dataset('monet_jpg')
db[0].shape
torch.Size([3, 64, 64])
Generator
¶Generator(100, 128)
Generator(
(main): Sequential(
(0): ConvTranspose2d(100, 1024, kernel_size=(4, 4), stride=(1, 1), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): ConvTranspose2d(1024, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU(inplace=True)
(9): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): ReLU(inplace=True)
(12): ConvTranspose2d(128, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(13): Tanh()
)
)
Dyskryminator
¶Discriminator(128)
Discriminator(
(main): Sequential(
(0): Conv2d(3, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(1): LeakyReLU(negative_slope=0.2, inplace=True)
(2): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(4): LeakyReLU(negative_slope=0.2, inplace=True)
(5): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(6): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.2, inplace=True)
(8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
(9): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(10): LeakyReLU(negative_slope=0.2, inplace=True)
(11): Conv2d(1024, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
(12): Sigmoid()
)
)
DCGAN
¶class DCGAN:
def __init__(self, noise_size, img_dim):
self.noise_size = noise_size
self.D = Discriminator(img_dim)
self.G = Generator(noise_size, img_dim)
self.device = torch.device("cuda") if torch.cuda.is_available()
else torch.device("cpu")
self.D.to(self.device)
self.G.to(self.device)
self.D.apply(self.weights_init)
self.G.apply(self.weights_init)
def weights_init(self, m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
def describe(self):
print('Discriminator')
print(self.D)
print('\nGenerator')
print(self.G)
def __calculate_loss(self, output, labels):
criterion = nn.BCELoss()
return criterion(output.squeeze(), labels)
def real_loss(self, D_out):
batch_size = D_out.size(0)
labels = torch.ones(batch_size).to(self.device)*0.8
return self.__calculate_loss(D_out, labels)
def fake_loss(self, D_out):
batch_size = D_out.size(0)
labels = torch.ones(batch_size).to(self.device)*0.1
return self.__calculate_loss(D_out, labels)
def noise(self, size):
z = np.random.uniform(-1, 1, size=size)
return torch.from_numpy(z).float().to(self.device)
def train_generator(self, g_optim, size):
g_optim.zero_grad()
z = self.noise(size)
fake_images = self.G(z)
d_fake = self.D(fake_images)
g_loss = self.real_loss(d_fake)
g_loss.backward()
g_optim.step()
return g_loss.item()
def train_discriminator(self, d_optim, real_images, size):
d_optim.zero_grad()
d_real = self.D(real_images.to(self.device)).view(-1)
d_real_loss = self.real_loss(d_real)
z = self.noise(size)
fake_images = self.G(z)
d_fake = self.D(fake_images)
d_fake_loss = self.fake_loss(d_fake)
d_loss = d_real_loss + d_fake_loss
d_loss.backward()
d_optim.step()
return d_loss.item()
def train(self, num_epochs, d_optim, g_optim, data_loader,
z_size, sample_size, print_every=500):
samples, losses = [], []
z = self.noise((sample_size, z_size))
self.D.train()
self.G.train()
print(f'Running on {self.device}')
for epoch in range(num_epochs):
for i, real_images in enumerate(data_loader):
batch_size = real_images.size(0)
d_loss = self.train_discriminator(d_optim, real_images,
(sample_size, z_size))
g_loss = self.train_generator(g_optim, (sample_size, z_size))
if i % print_every == 0:
print('Epoch [{:5d}/{:5d}] | d_loss {:6.4f} | g_loss {:6.4f}'.format(
epoch+1,
num_epochs,
d_loss,
g_loss
))
losses.append( (d_loss, g_loss) )
self.G.eval()
samples.append( self.G(z) )
self.G.train()
with open('DCGAN_Sample_Output.pkl', 'wb') as f:
pkl.dump(samples, f)
return samples, losses
Trening
¶# Zbior danych
monet_dataset = Dataset('monet_jpg') #data_dir
data_loader = DataLoader(monet_dataset, batch_size=16, shuffle=True)
noise_size = 128
img_size = 64
# Model
dcgan_model = DCGAN(noise_size, img_size)
# Optymalizacja
lr = 0.0002
beta1=0.5
beta2=0.999
d_optimizer = optim.Adam(dcgan_model.D.parameters(), lr, [beta1, beta2])
g_optimizer = optim.Adam(dcgan_model.G.parameters(), lr, [beta1, beta2])
# Trening
EPOCHS = 30
sample_size = 16 # Rozmiar probki obrazow dla kazdej epoki
sample_result, losses_history = dcgan_model.train(EPOCHS, d_optimizer,
g_optimizer, data_loader,
noise_size, 16, print_every=1000)
Running on cpu Epoch [ 1/ 30] | d_loss 1.3821 | g_loss 4.3248 Epoch [ 2/ 30] | d_loss 1.1113 | g_loss 5.0841 Epoch [ 3/ 30] | d_loss 1.4034 | g_loss 8.0827 Epoch [ 4/ 30] | d_loss 2.2660 | g_loss 9.0380 Epoch [ 5/ 30] | d_loss 0.9263 | g_loss 1.0833 Epoch [ 6/ 30] | d_loss 1.6072 | g_loss 11.2540 Epoch [ 7/ 30] | d_loss 0.9794 | g_loss 4.9714 Epoch [ 8/ 30] | d_loss 0.9773 | g_loss 3.2008 Epoch [ 9/ 30] | d_loss 1.3099 | g_loss 6.3319 Epoch [ 10/ 30] | d_loss 1.0572 | g_loss 4.0379 Epoch [ 11/ 30] | d_loss 0.8879 | g_loss 2.0297 Epoch [ 12/ 30] | d_loss 1.0964 | g_loss 2.8637 Epoch [ 13/ 30] | d_loss 0.9980 | g_loss 1.5379 Epoch [ 14/ 30] | d_loss 0.8878 | g_loss 1.7146 Epoch [ 15/ 30] | d_loss 1.0334 | g_loss 2.5858 Epoch [ 16/ 30] | d_loss 0.9091 | g_loss 1.4181 Epoch [ 17/ 30] | d_loss 1.0753 | g_loss 2.5127 Epoch [ 18/ 30] | d_loss 1.1008 | g_loss 2.6952 Epoch [ 19/ 30] | d_loss 0.9950 | g_loss 2.0705 Epoch [ 20/ 30] | d_loss 1.0197 | g_loss 3.9312 Epoch [ 21/ 30] | d_loss 1.0382 | g_loss 3.7831 Epoch [ 22/ 30] | d_loss 0.9258 | g_loss 2.4845 Epoch [ 23/ 30] | d_loss 1.0808 | g_loss 4.5152 Epoch [ 24/ 30] | d_loss 1.0624 | g_loss 2.9008 Epoch [ 25/ 30] | d_loss 1.1572 | g_loss 3.0703 Epoch [ 26/ 30] | d_loss 1.3712 | g_loss 1.5263 Epoch [ 27/ 30] | d_loss 1.0455 | g_loss 1.5280 Epoch [ 28/ 30] | d_loss 1.2308 | g_loss 1.6251 Epoch [ 29/ 30] | d_loss 1.1049 | g_loss 2.1429 Epoch [ 30/ 30] | d_loss 1.1599 | g_loss 2.2009
Wyniki
¶Video("normal0_001.mp4")
Video("normal-11.mp4")
Video("normal0100.mp4")
Video("normal11.mp4")
Wnioski
¶Dziękujemy za uwagę!
¶